from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.readers.file import PyMuPDFReader
from llama_index.core.node_parser import TokenTextSplitter
from llamaindex02 import show_list_obj
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

reader = SimpleDirectoryReader(
    input_dir="./data",
    required_exts=[".pdf"],
    file_extractor={".pdf":PyMuPDFReader()}
)

documents = reader.load_data()

parser = TokenTextSplitter(chunk_size=300, chunk_overlap=100)

nodes = parser.get_nodes_from_documents(documents)

index = VectorStoreIndex(nodes)

vector_retriever = index.as_retriever(
    similarity_top_k=2
)

results = vector_retriever.retrieve("Llama2有多少参数")

show_list_obj(results)
